plus sc10x

run cellranger with Spp1-tdt mod reference index

exogenous genes:
CreERT2 (following the final Spp1-exon which is not complete, might be hardly detected in this time point)
tdTomato (further fixed: separated as head(1-280)/mid(300-1200)/tail(1600-2025))

tdTomato-head signal in 10x may represent former structure with ‘stop codon’ instead of Cre-edited tdTomato-transcript
tdTomato-mid signal cover the up-half of tdTomato, not sure how it’s sequenced, but seems like real tdt signal
tdTomato-tail signal within final 3-400bp of complete tdTomato structure should be the expected tdTomato-transcript

indeed, tdt-head highly enriched in CTR/MIG samples,
however, tdt-mid got much higher than tdt-tail, they both expressed like real tdt signal

load dependancies

loading 70k cells, CX3CR1+
plus, cellranger called 39,986 cells

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus_fixed/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32289 39989
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGAACAAGG-1 AAACCCAAGCCGCTTG-1 AAACCCAAGGTCGACA-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGGTTCTTG-1 AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]    10 39989
FB[,1:6]
## 10 x 6 sparse Matrix of class "dgCMatrix"
##              AAACCCAAGAACAAGG-1 AAACCCAAGCCGCTTG-1 AAACCCAAGGTCGACA-1
## P30.PBS.TDT                   .                  8                  4
## P30.PBS.MIG                   7                  7                 11
## P30.PBS.CTR                   2                  2                  6
## P30.LPS.TDT1                  4                 53                 11
## P30.LPS.TDT2                  3                  1                  3
## P30.LPS.MIG                   2                 44                  6
## P30.LPS.CTR                 116                  3                120
## P06.TDT                       4                  4                  5
## P06.MIG                     146                  6                  4
## P06.CTR                       6                  2                  4
##              AAACCCAAGGTTCTTG-1 AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1
## P30.PBS.TDT                   1                  4                 12
## P30.PBS.MIG                  10                 10                 23
## P30.PBS.CTR                   2                  6                 10
## P30.LPS.TDT1                 81                  8                 38
## P30.LPS.TDT2                  2                  4                 12
## P30.LPS.MIG                   1                 15                 15
## P30.LPS.CTR                   3                  5                  5
## P06.TDT                       2                366                172
## P06.MIG                       7                 12                 12
## P06.CTR                       4                  3                  8
rowSums(FB)
##  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 P30.LPS.TDT2  P30.LPS.MIG 
##       581785      1813218       790490       936583       525620       803207 
##  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##       565858       857151      1115753       802773
rowSums(FB>0)
##  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 P30.LPS.TDT2  P30.LPS.MIG 
##        38815        39925        38801        39888        37004        38668 
##  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##        37610        38997        39378        37657

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Spp1tdt")

FB.seur
## An object of class Seurat 
## 10 features across 39989 samples within 1 assay 
## Active assay: RNA (10 features, 0 variable features)
rownames(FB.seur)
##  [1] "P30.PBS.TDT"  "P30.PBS.MIG"  "P30.PBS.CTR"  "P30.LPS.TDT1" "P30.LPS.TDT2"
##  [6] "P30.LPS.MIG"  "P30.LPS.CTR"  "P06.TDT"      "P06.MIG"      "P06.CTR"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

scales::show_col(ggsci::pal_igv("default")(49))

color.FB <- ggsci::pal_igv("default")(49)[c(8,32,40,
                                            34,26,33,28,
                                            2,43,18)]

#2,13,33,1,15,
#34,26,28,40,18

level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 5)

scales::show_col(color.FB, ncol = 5)

par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
plot(sort(t(FB)[,9]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
plot(sort(t(FB)[,10]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])

range ‘q’

q0.50 ~ 0.99
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("P30|P06",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])

plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
plot(table.demux[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux)[2+9])
plot(table.demux[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux)[2+10])

q0.9900 ~ 0.9999
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("P30|P06",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])

plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
plot(table.demux.s[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux.s)[2+9])
plot(table.demux.s[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux.s)[2+10])

demultiplexing

(just using q0.999 as an optimal cutoff for all except tag4 would take q0.996)

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for P30.PBS.TDT : 15 reads
## Cutoff for P30.PBS.MIG : 34 reads
## Cutoff for P30.PBS.CTR : 20 reads
## Cutoff for P30.LPS.TDT1 : 34 reads
## Cutoff for P30.LPS.TDT2 : 11 reads
## Cutoff for P30.LPS.MIG : 17 reads
## Cutoff for P30.LPS.CTR : 12 reads
## Cutoff for P06.TDT : 16 reads
## Cutoff for P06.MIG : 24 reads
## Cutoff for P06.CTR : 16 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    13347     1608    25034
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##      Doublet     Negative  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 
##        13347         1608         2494         2582         2413         2486 
## P30.LPS.TDT2  P30.LPS.MIG  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##         2657         2475         2579         2630         2452         2266

tag4 would take q0.996 cutoff

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for P30.PBS.TDT : 17 reads
## Cutoff for P30.PBS.MIG : 38 reads
## Cutoff for P30.PBS.CTR : 23 reads
## Cutoff for P30.LPS.TDT1 : 39 reads
## Cutoff for P30.LPS.TDT2 : 13 reads
## Cutoff for P30.LPS.MIG : 19 reads
## Cutoff for P30.LPS.CTR : 14 reads
## Cutoff for P06.TDT : 18 reads
## Cutoff for P06.MIG : 28 reads
## Cutoff for P06.CTR : 19 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12691     1728    25570
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##      Doublet     Negative  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 
##        12691         1728         2561         2641         2462         2532 
## P30.LPS.TDT2  P30.LPS.MIG  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##         2705         2538         2639         2693         2497         2302
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.999)
## Cutoff for P30.PBS.TDT : 20 reads
## Cutoff for P30.PBS.MIG : 45 reads
## Cutoff for P30.PBS.CTR : 28 reads
## Cutoff for P30.LPS.TDT1 : 47 reads
## Cutoff for P30.LPS.TDT2 : 16 reads
## Cutoff for P30.LPS.MIG : 23 reads
## Cutoff for P30.LPS.CTR : 16 reads
## Cutoff for P06.TDT : 21 reads
## Cutoff for P06.MIG : 34 reads
## Cutoff for P06.CTR : 23 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12092     1924    25973
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##      Doublet     Negative  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 
##        12092         1924         2609         2690         2485         2520 
## P30.LPS.TDT2  P30.LPS.MIG  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##         2759         2569         2684         2742         2553         2362
# tags q0.999
#   tag4 would take q0.996 cutoff  47->39
cutoff.FB <- c(20,45,28,39,16,
               23,16,21,34,23)

names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
##  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 P30.LPS.TDT2  P30.LPS.MIG 
##           20           45           28           39           16           23 
##  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##           16           21           34           23
par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
plot(sort(t(FB)[,9]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
abline(h=cutoff.FB[9],col = color.FB[9])
plot(sort(t(FB)[,10]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])
abline(h=cutoff.FB[10],col = color.FB[10])

custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
dim(df.FB)
## [1] 39989     2
df.FB[1:10,]
##                    HTO_classification.global      hash.ID
## AAACCCAAGAACAAGG-1                   Doublet      Doublet
## AAACCCAAGCCGCTTG-1                   Doublet      Doublet
## AAACCCAAGGTCGACA-1                   Singlet  P30.LPS.CTR
## AAACCCAAGGTTCTTG-1                   Singlet P30.LPS.TDT1
## AAACCCAAGTCGGCCT-1                   Singlet      P06.TDT
## AAACCCAAGTCTGCGC-1                   Singlet      P06.TDT
## AAACCCAAGTTTGGCT-1                   Singlet P30.LPS.TDT2
## AAACCCACAAACTCGT-1                   Singlet  P30.PBS.MIG
## AAACCCACAACCCGCA-1                   Doublet      Doublet
## AAACCCACAAGACAAT-1                  Negative     Negative
table(df.FB$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    12192     1845    25952
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative P30.PBS.TDT P30.PBS.MIG P30.PBS.CTR
##                  Doublet    12192        0           0           0           0
##                  Negative       0     1845           0           0           0
##                  Singlet        0        0        2604        2679        2474
##                          hash.ID
## HTO_classification.global P30.LPS.TDT1 P30.LPS.TDT2 P30.LPS.MIG P30.LPS.CTR
##                  Doublet             0            0           0           0
##                  Negative            0            0           0           0
##                  Singlet          2596         2748        2564        2667
##                          hash.ID
## HTO_classification.global P06.TDT P06.MIG P06.CTR
##                  Doublet        0       0       0
##                  Negative       0       0       0
##                  Singlet     2731    2540    2349
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAACAAGG-1    Spp1tdt        290            9        290            9
## AAACCCAAGCCGCTTG-1    Spp1tdt        130           10        130           10
## AAACCCAAGGTCGACA-1    Spp1tdt        174           10        174           10
## AAACCCAAGGTTCTTG-1    Spp1tdt        113           10        113           10
##                       HTO_maxID HTO_secondID HTO_margin
## AAACCCAAGAACAAGG-1  P30.LPS.CTR      P06.MIG  0.2214566
## AAACCCAAGCCGCTTG-1  P30.LPS.MIG P30.LPS.TDT1  0.2982220
## AAACCCAAGGTCGACA-1  P30.LPS.CTR P30.LPS.TDT1  2.4603460
## AAACCCAAGGTTCTTG-1 P30.LPS.TDT1      P06.MIG  1.4617204
##                          HTO_classification HTO_classification.global
## AAACCCAAGAACAAGG-1      P06.MIG_P30.LPS.CTR                   Doublet
## AAACCCAAGCCGCTTG-1 P30.LPS.MIG_P30.LPS.TDT1                   Doublet
## AAACCCAAGGTCGACA-1              P30.LPS.CTR                   Singlet
## AAACCCAAGGTTCTTG-1             P30.LPS.TDT1                   Singlet
##                         hash.ID
## AAACCCAAGAACAAGG-1      Doublet
## AAACCCAAGCCGCTTG-1      Doublet
## AAACCCAAGGTCGACA-1  P30.LPS.CTR
## AAACCCAAGGTTCTTG-1 P30.LPS.TDT1
## if all using same q, run this chunk instead of several custom ones above
#
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,30500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=5,
          cols = color.FB) 

with ridge plots, filtered
#FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=5,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:10, perplexity = 500, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB1006.seur.subset.rds")
FB.seur.subset <- readRDS("FB1006.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order =  rev(rownames(FB.seur)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = rev(c("Negative",rownames(FB.seur),"Doublet")), 
        group.by = 'hash.ID', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)

generally looks good,
a few remained putative doublets need to filter out in GEX part

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID)
## 
##      Doublet     Negative  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 
##        12192         1845         2604         2679         2474         2596 
## P30.LPS.TDT2  P30.LPS.MIG  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##         2748         2564         2667         2731         2540         2349
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID)
barplot(sl_stat,ylim = c(0,16800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+1075,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "Spp1tdt")
GEX.seur
## An object of class Seurat 
## 17900 features across 39989 samples within 1 assay 
## Active assay: RNA (17900 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##      Doublet     Negative  P30.PBS.TDT  P30.PBS.MIG  P30.PBS.CTR P30.LPS.TDT1 
##        12192         1845         2604         2679         2474         2596 
## P30.LPS.TDT2  P30.LPS.MIG  P30.LPS.CTR      P06.TDT      P06.MIG      P06.CTR 
##         2748         2564         2667         2731         2540         2349
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat 
## 17900 features across 25952 samples within 1 assay 
## Active assay: RNA (17900 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 10 & nFeature_RNA > 800 & nFeature_RNA < 4500 & nCount_RNA < 16000)
GEX.seur
## An object of class Seurat 
## 17900 features across 23829 samples within 1 assay 
## Active assay: RNA (17900 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID)
barplot(sl_stat,ylim = c(0,15800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+1075,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,3800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+245,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

P06 has much more cycling

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1600)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 300)
##   [1] "Saa3"         "Ccl4"         "Cxcl10"       "Ccl3"         "Ccl5"        
##   [6] "Cd74"         "Marco"        "H2-Aa"        "H2-Ab1"       "Hist1h1b"    
##  [11] "Cxcl13"       "H2-Eb1"       "Apoe"         "Mki67"        "Spp1"        
##  [16] "Lcn2"         "Ch25h"        "Top2a"        "Ccl2"         "Cxcl9"       
##  [21] "Ccl7"         "Xist"         "Pf4"          "Ccl12"        "Hist1h2ap"   
##  [26] "Rsad2"        "Il1b"         "Hist1h2ae"    "Cenpf"        "Lyve1"       
##  [31] "Lyz2"         "Cd69"         "Ube2c"        "Lgals3"       "Apoc1"       
##  [36] "F13a1"        "S100a6"       "S100a4"       "Hmgb2"        "Egr1"        
##  [41] "Prc1"         "Stmn1"        "Fabp5"        "Cybb"         "Cd163"       
##  [46] "Pclaf"        "Iigp1"        "Ms4a7"        "Acod1"        "Il12b"       
##  [51] "Ifit3"        "Ifi27l2a"     "Cp"           "Lpl"          "Slc13a3"     
##  [56] "Mrc1"         "Hist1h3c"     "Hmmr"         "Cenpa"        "Hba-a1"      
##  [61] "Nusap1"       "Cd209f"       "Plac8"        "Hba-a2"       "Ifit2"       
##  [66] "Birc5"        "Hist1h2ab"    "Hbb-bs"       "Cd72"         "Ifit1"       
##  [71] "Wfdc21"       "Egr3"         "Ear2"         "Ifitm3"       "Tnf"         
##  [76] "Hp"           "Stmn3"        "Fn1"          "Gm26917"      "Crip1"       
##  [81] "Atp1b1"       "Napsa"        "Itga4"        "Cenpe"        "Meg3"        
##  [86] "Fpr1"         "Pcsk1n"       "Syt1"         "Ly6k"         "Fth1"        
##  [91] "Wfdc17"       "Kif11"        "Gpr84"        "Lgals1"       "Gm26885"     
##  [96] "Sncb"         "Aldh1a3"      "Knl1"         "Adgre5"       "Ccl8"        
## [101] "Ahnak"        "Stmn2"        "Cplx1"        "Tpx2"         "Ly6i"        
## [106] "Pbk"          "Hbb-bt"       "Clec4d"       "Gm50255"      "Plbd1"       
## [111] "Aspm"         "Ccr2"         "Ace"          "Cd55"         "Olfr889"     
## [116] "Ms4a6c"       "Bex2"         "Cdca8"        "Atf3"         "Cxcl2"       
## [121] "Caly"         "Hist1h1e"     "Esco2"        "Olfr1369-ps1" "Sparcl1"     
## [126] "Ccr7"         "Nap1l5"       "Kif23"        "Trap1a"       "Ptgds"       
## [131] "Ly6h"         "Clec4e"       "AW112010"     "H2afx"        "Snhg11"      
## [136] "Gad2"         "Spn"          "Arg1"         "Vim"          "Cd300e"      
## [141] "Rrm2"         "Clec10a"      "Smc4"         "Ifitm6"       "Cdca3"       
## [146] "Cd38"         "Nrxn3"        "Ccr1"         "Mgl2"         "Ccnb1"       
## [151] "Dennd5b"      "Spc24"        "Cdkn1a"       "Alas2"        "C1qtnf4"     
## [156] "Ccnb2"        "Robo2"        "Isg15"        "Wdcp"         "Fxyd2"       
## [161] "Kif15"        "Dqx1"         "Lockd"        "Gm10457"      "Nrxn1"       
## [166] "Ftl1"         "Dlx6os1"      "Ifi211"       "Cxcl14"       "Loxl2"       
## [171] "Tnfaip2"      "Rian"         "Smc2"         "Plk1"         "Cdk1"        
## [176] "Ptges"        "Ifit3b"       "Itgal"        "Gpr141"       "Tceal5"      
## [181] "Fcna"         "Gdf15"        "Cd52"         "Cfp"          "Cd79a"       
## [186] "Ccnd2"        "Hist1h4d"     "Sirpb1c"      "Cxcl16"       "Plp1"        
## [191] "Cbr2"         "H2afz"        "Apoc4"        "Racgap1"      "Ccna2"       
## [196] "Tspyl4"       "Treml4"       "Ifi204"       "Ndrg4"        "Gm42047"     
## [201] "Kcnk2"        "Nrgn"         "Igf1"         "Lilrb4a"      "Sox11"       
## [206] "Pla2g7"       "Alcam"        "Cyfip2"       "Mis18bp1"     "Hsp90ab1"    
## [211] "Oasl1"        "Ndn"          "Hist2h2ac"    "Pcdh7"        "Kif20b"      
## [216] "Olfm1"        "Cd36"         "Gadd45b"      "Cdc20"        "Mmp12"       
## [221] "Slfn5"        "Tmsb10"       "Kcnk4"        "Sorbs2"       "Tubb3"       
## [226] "Tk1"          "Mt1"          "Fabp4"        "Tac1"         "Il10"        
## [231] "Slc6a1"       "Pbld1"        "Lrp1b"        "Gabrg2"       "Hist1h1a"    
## [236] "Scg2"         "Gm31814"      "Ckap2l"       "Hspa5"        "Bst2"        
## [241] "Nr4a1"        "Snca"         "Nfasc"        "Timd4"        "Dab2"        
## [246] "Adarb2"       "Arx"          "Nuf2"         "Ifi203"       "Epha5"       
## [251] "Ifi207"       "Cd63"         "Saa1"         "Anln"         "Msr1"        
## [256] "Vpreb3"       "Crybb1"       "Sgo2a"        "Ifnb1"        "Gpm6a"       
## [261] "Srgn"         "Bub1b"        "Nfib"         "Clec4n"       "Kcnq1ot1"    
## [266] "Snap25"       "Tlr2"         "C4b"          "Ifitm2"       "Ndc80"       
## [271] "Dnm1"         "Hist1h1d"     "Dock5"        "Gm8251"       "Lhfpl4"      
## [276] "Ifi44"        "Nsg1"         "Gng3"         "Ms4a4c"       "Sct"         
## [281] "Neil3"        "Gm29773"      "Rhox5"        "Pou4f1"       "Tsix"        
## [286] "Cytip"        "Myt1l"        "Sphkap"       "Egr2"         "Fpr2"        
## [291] "Ier3"         "Cpd"          "Spc25"        "Gpnmb"        "Tro"         
## [296] "Peg3"         "Jaml"         "Cntfr"        "Gabrg3"       "Rab27b"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 
#   add sex-related Xist/Tsix

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|Egr1|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AC|^AI|^AA|^AW|^AY|^BC|^Gm|^Hist|Rik$|-ps|Xist|Tsix|^Ifi|^Isg|^Mcm",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)

VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  P2ry12, Gpr34, Selenop, Hpgd, Crybb1, Ltc4s, Maf, Fau, Ramp1, Arsb 
##     Sox4, Cox7a2l, Tpt1, Lst1, Cd9, Rcbtb2, Lrp1, Ctsd, Serpine2, Apoe 
##     Klhl24, Sgk1, Slc12a2, Igfbp4, Gnas, Lsp1, Npnt, Cbfa2t3, Ccl6, Hmgb1 
## Negative:  Ccl12, Bst2, Ccl2, Ms4a6c, Ctsc, Srgn, Lgals3bp, Fth1, Slfn2, Gpr84 
##     Ly6e, Fcgr4, Calr, Tspo, Ms4a6d, Sdf2l1, Rtp4, Fcgr1, Manf, Slfn5 
##     C5ar1, Pdia6, Pdia3, Cxcl16, Trim30a, Cybb, Nme2, Naaa, Cebpb, Ctsz 
## PC_ 2 
## Positive:  Pclaf, Prc1, Knl1, Pbk, Spc24, Kif15, Lockd, Ccna2, Racgap1, Neil3 
##     Esco2, Bub1b, Tk1, Aspm, Sgo2a, Stmn1, Mis18bp1, Ccnb1, Ccnf, Shcbp1 
##     Spc25, Plk1, Cit, Knstrn, H2afx, Ncapg, Trim59, Sgo1, Kif4, Cdkn3 
## Negative:  Ctsd, Calr, Fcgr1, Pmp22, Fcgr2b, Cd63, Pdia6, Gpr34, Ctsl, Srgn 
##     Spint1, Ccr5, Ms4a6d, Ccl12, P2ry12, Herpud1, Abca1, C5ar1, Cebpd, Ly6e 
##     Trim30d, Ddit4, Ctsz, Socs3, Ms4a6b, Nav3, Tent5a, Pdia3, Csmd3, Il4ra 
## PC_ 3 
## Positive:  Cybb, Ch25h, Apoe, Ccl3, Aoah, Cp, Fau, Cd52, Clec4a1, Cd72 
##     Tpt1, Fpr1, Ccl4, Ftl1, Ccl5, Ehd1, Saa3, Lilrb4a, H2-D1, Itgal 
##     C3, Lyz2, Marco, Cd69, Fpr2, Acod1, Cxcl10, Prdx5, Mcemp1, Bcl2a1d 
## Negative:  Ly6e, Spint1, Il4ra, Ccr5, Calr, Sdf2l1, Pdia6, Fcgr1, Manf, Nav3 
##     Gbp7, Pdia3, Srgn, Gadd45g, Fcgr2b, Trim30a, Ccl12, Cct6a, Ctsd, Cd244a 
##     C5ar1, Slfn2, Cdc42ep3, Naaa, Tuba1b, Cebpd, Socs3, Runx1, Nop56, Cebpb 
## PC_ 4 
## Positive:  Tpt1, Fau, Cfl1, Rbm3, Apoe, Ctsl, Nme2, Npm1, Ftl1, Gas5 
##     Ptma, Emp3, Zfas1, Cd63, Gatm, Cd52, Igf1, Tspo, Gapdh, Chd4 
##     Cox7a2l, Igfbp4, Gnas, Apoc1, Klf2, Gpx3, Lyz2, Ccr1, Fxyd5, Lgals1 
## Negative:  Ch25h, Cxcl10, Pmp22, Ccl4, Cd69, Saa3, Ctsd, Ccl3, P2ry12, Gpr34 
##     Rsad2, Il7r, Fpr1, Ccl5, Acod1, Cybb, Oasl1, Marco, Ccl7, Cp 
##     Cd9, Csmd3, Aoah, Tnfaip2, Iigp1, Btg2, Upk1b, Zbtb20, Maf, H2-Aa 
## PC_ 5 
## Positive:  Ctsz, Cd9, Pmp22, Ctsd, Lilrb4a, Lpl, Gnl3, Ccl3, C3ar1, C5ar1 
##     Cd63, Tnfaip2, Nop56, Plaur, Lgals3, Tlr2, Cadm1, Mif, Ncl, Ftl1 
##     Ccl4, Msr1, Tnf, Gpr84, Ehd1, Pdgfa, Gapdh, Cd83, Cxcl16, Cpd 
## Negative:  Iigp1, Rsad2, Oasl2, Slfn5, Usp18, Oasl1, Irf7, Cmpk2, Herc6, Parp14 
##     Fgl2, Stat1, Gbp2, Cxcl10, Slfn9, Samd9l, Samhd1, Tnfsf10, Rnf213, Phf11d 
##     Eif2ak2, Phf11b, Sp100, Ddx58, Zbp1, Stat2, Gbp5, Ddx60, Slfn8, Rtp4
length(VariableFeatures(GEX.seur))
## [1] 1246
head(VariableFeatures(GEX.seur),500)
##   [1] "Saa3"        "Ccl4"        "Cxcl10"      "Ccl3"        "Ccl5"       
##   [6] "Cd74"        "Marco"       "H2-Aa"       "H2-Ab1"      "Cxcl13"     
##  [11] "H2-Eb1"      "Apoe"        "Spp1"        "Lcn2"        "Ch25h"      
##  [16] "Ccl2"        "Cxcl9"       "Ccl7"        "Pf4"         "Ccl12"      
##  [21] "Rsad2"       "Il1b"        "Lyve1"       "Lyz2"        "Cd69"       
##  [26] "Lgals3"      "Apoc1"       "F13a1"       "S100a6"      "S100a4"     
##  [31] "Prc1"        "Stmn1"       "Fabp5"       "Cybb"        "Cd163"      
##  [36] "Pclaf"       "Iigp1"       "Ms4a7"       "Acod1"       "Il12b"      
##  [41] "Cp"          "Lpl"         "Slc13a3"     "Mrc1"        "Cd209f"     
##  [46] "Plac8"       "Cd72"        "Wfdc21"      "Egr3"        "Ear2"       
##  [51] "Tnf"         "Hp"          "Stmn3"       "Fn1"         "Crip1"      
##  [56] "Atp1b1"      "Napsa"       "Itga4"       "Meg3"        "Fpr1"       
##  [61] "Pcsk1n"      "Syt1"        "Ly6k"        "Fth1"        "Wfdc17"     
##  [66] "Gpr84"       "Lgals1"      "Sncb"        "Aldh1a3"     "Knl1"       
##  [71] "Adgre5"      "Ccl8"        "Ahnak"       "Stmn2"       "Cplx1"      
##  [76] "Ly6i"        "Pbk"         "Clec4d"      "Plbd1"       "Aspm"       
##  [81] "Ccr2"        "Ace"         "Cd55"        "Olfr889"     "Ms4a6c"     
##  [86] "Bex2"        "Atf3"        "Cxcl2"       "Caly"        "Esco2"      
##  [91] "Sparcl1"     "Ccr7"        "Nap1l5"      "Ptgds"       "Ly6h"       
##  [96] "Clec4e"      "H2afx"       "Snhg11"      "Gad2"        "Spn"        
## [101] "Arg1"        "Vim"         "Cd300e"      "Clec10a"     "Cd38"       
## [106] "Nrxn3"       "Ccr1"        "Mgl2"        "Ccnb1"       "Dennd5b"    
## [111] "Spc24"       "Cdkn1a"      "Alas2"       "C1qtnf4"     "Robo2"      
## [116] "Wdcp"        "Fxyd2"       "Kif15"       "Dqx1"        "Lockd"      
## [121] "Nrxn1"       "Ftl1"        "Dlx6os1"     "Cxcl14"      "Loxl2"      
## [126] "Tnfaip2"     "Rian"        "Smc2"        "Plk1"        "Ptges"      
## [131] "Itgal"       "Gpr141"      "Tceal5"      "Fcna"        "Gdf15"      
## [136] "Cd52"        "Cfp"         "Cd79a"       "Ccnd2"       "Sirpb1c"    
## [141] "Cxcl16"      "Plp1"        "Cbr2"        "H2afz"       "Apoc4"      
## [146] "Racgap1"     "Ccna2"       "Tspyl4"      "Treml4"      "Ndrg4"      
## [151] "Kcnk2"       "Nrgn"        "Igf1"        "Lilrb4a"     "Sox11"      
## [156] "Pla2g7"      "Alcam"       "Cyfip2"      "Mis18bp1"    "Oasl1"      
## [161] "Ndn"         "Pcdh7"       "Olfm1"       "Cd36"        "Gadd45b"    
## [166] "Mmp12"       "Slfn5"       "Tmsb10"      "Kcnk4"       "Sorbs2"     
## [171] "Tubb3"       "Tk1"         "Mt1"         "Fabp4"       "Tac1"       
## [176] "Il10"        "Slc6a1"      "Pbld1"       "Lrp1b"       "Gabrg2"     
## [181] "Scg2"        "Bst2"        "Nr4a1"       "Snca"        "Nfasc"      
## [186] "Timd4"       "Dab2"        "Adarb2"      "Arx"         "Epha5"      
## [191] "Cd63"        "Saa1"        "Msr1"        "Crybb1"      "Sgo2a"      
## [196] "Ifnb1"       "Gpm6a"       "Srgn"        "Bub1b"       "Nfib"       
## [201] "Clec4n"      "Kcnq1ot1"    "Snap25"      "Tlr2"        "C4b"        
## [206] "Dnm1"        "Dock5"       "Lhfpl4"      "Nsg1"        "Gng3"       
## [211] "Ms4a4c"      "Sct"         "Neil3"       "Rhox5"       "Pou4f1"     
## [216] "Cytip"       "Myt1l"       "Sphkap"      "Egr2"        "Fpr2"       
## [221] "Ier3"        "Cpd"         "Spc25"       "Gpnmb"       "Tro"        
## [226] "Peg3"        "Jaml"        "Cntfr"       "Gabrg3"      "Rab27b"     
## [231] "Cd83"        "Pcdh10"      "Elavl2"      "H2-Q7"       "Stmn4"      
## [236] "Csf1"        "Grm5"        "Nr2f1"       "Enah"        "Krt80"      
## [241] "Ccl6"        "Ly6a"        "Ms4a6d"      "Fcgr4"       "Gap43"      
## [246] "Shcbp1"      "Folr2"       "Melk"        "C3"          "Serp2"      
## [251] "Pcdh9"       "Gabbr2"      "Tshz2"       "Spib"        "Cspg4"      
## [256] "Bex1"        "Mir155hg"    "Cep55"       "Slc7a11"     "Pcdh17"     
## [261] "Slc30a10"    "Lgi1"        "Slc1a2"      "Dclk1"       "Ebf1"       
## [266] "H2afv"       "Scg5"        "Shank1"      "Runx3"       "Nefm"       
## [271] "Cdkn3"       "Oasl2"       "Tceal6"      "Sox2"        "Tceal3"     
## [276] "Apoc2"       "Fxyd5"       "Tox3"        "Cadps"       "Sgo1"       
## [281] "Vnn3"        "Usp18"       "Sod2"        "Cd9"         "Syn2"       
## [286] "Spock2"      "Fxyd6"       "Snrpn"       "Map1b"       "Kif4"       
## [291] "Cd28"        "Rnd3"        "Ndnf"        "Knstrn"      "Gria1"      
## [296] "Islr2"       "Lmnb1"       "Meis2"       "Mycn"        "Vcam1"      
## [301] "Gda"         "Plk2"        "Tspo"        "Kif20a"      "Neb"        
## [306] "Myof"        "Fam171b"     "Emp3"        "Ncam1"       "Serpinb8"   
## [311] "Ccnf"        "Apob"        "Spag5"       "H2-K1"       "Ncapg"      
## [316] "Sag"         "Kctd16"      "Clstn3"      "Pcp4"        "Gbp2"       
## [321] "Mt3"         "Incenp"      "Fcrla"       "Spaar"       "Cxadr"      
## [326] "Slc32a1"     "Clec4a1"     "Slamf7"      "Scn2a"       "Trim59"     
## [331] "Lmo3"        "Scrn1"       "Clec12a"     "Apod"        "Fbxo5"      
## [336] "Prkar2b"     "Cit"         "Ccdc136"     "Iqgap1"      "Nes"        
## [341] "Slfn1"       "Rab3b"       "Csdc2"       "Nxpe4"       "Auts2"      
## [346] "Diaph3"      "Gria2"       "Chl1"        "E2f7"        "Jsrp1"      
## [351] "Ccl9"        "Tmem26"      "Ramp1"       "Mcemp1"      "Crmp1"      
## [356] "Cpne9"       "Lig1"        "Ednrb"       "Uchl1"       "Mxd3"       
## [361] "Kif22"       "S100a8"      "Ntng1"       "Ezh2"        "Lrrc31"     
## [366] "Elmod1"      "Rnase2a"     "Igsf11"      "Retnla"      "Cst7"       
## [371] "Fabp3"       "Rcan2"       "Ptn"         "Parp14"      "Pglyrp1"    
## [376] "Lgals3bp"    "Ccdc34"      "Camkv"       "Ppp2r2b"     "Rmi2"       
## [381] "Brca1"       "Ptprcap"     "Ncald"       "Irf7"        "Gng13"      
## [386] "Rims1"       "Rph3a"       "Sst"         "Klhl33"      "Tmem130"    
## [391] "Cd44"        "Fgl2"        "Adgre4"      "Id2"         "Herpud1"    
## [396] "Bst1"        "Ocstamp"     "Ncapg2"      "Klf2"        "Slfn2"      
## [401] "Klra2"       "Id3"         "Cttnbp2"     "Dennd3"      "Ctsb"       
## [406] "Chchd10"     "Cfb"         "Crip2"       "Mif"         "Mpped1"     
## [411] "Bcl2a1d"     "Sdc4"        "Gnaz"        "Adgrl3"      "Resp18"     
## [416] "Tnr"         "Ctnna2"      "Adgrg5"      "Rab3c"       "Manf"       
## [421] "Gabra1"      "Nxph1"       "Igfbp4"      "Eef1a2"      "Lsp1"       
## [426] "Cdkn2c"      "Gad1"        "Sdf2l1"      "H2-D1"       "Aoah"       
## [431] "C3ar1"       "Npy"         "Grin1"       "Gria4"       "Cd24a"      
## [436] "Celf4"       "Zcchc18"     "Ctsc"        "Cmpk2"       "Rnf213"     
## [441] "Tubb5"       "Lamp5"       "Dct"         "Cd40"        "Nfkbia"     
## [446] "Marcksl1"    "Vgf"         "Cadm2"       "Ncl"         "Mir124a-1hg"
## [451] "Lrrk2"       "Shroom2"     "Pnmal2"      "Ttc9b"       "Tgfb2"      
## [456] "Cstb"        "Atp1a3"      "F10"         "Dcx"         "Troap"      
## [461] "Gpr65"       "Emilin2"     "Stil"        "Fau"         "Gfod2"      
## [466] "Mt2"         "Acp5"        "Col11a1"     "Tmem179"     "Diras2"     
## [471] "Myt1"        "Spock3"      "Gjc1"        "Sox10"       "Nap1l3"     
## [476] "Nnat"        "Brip1"       "Syde1"       "S1pr5"       "Dlgap1"     
## [481] "Astn1"       "Cdk19os"     "Trim9"       "Gucy1a1"     "Prr11"      
## [486] "Chgb"        "Timp1"       "Cenpm"       "Zbtb20"      "Tuba1c"     
## [491] "Opcml"       "Phf11d"      "Xkr4"        "Aox2"        "Cntnap5a"   
## [496] "Col5a1"      "Cdh4"        "Kcnq2"       "Bhlhe22"     "Kcnmb2"
grep("tdTomato|Tubb",VariableFeatures(GEX.seur),value = T)
## [1] "Tubb3"  "Tubb5"  "Tubb4a"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

(GEX.seur@reductions$pca@feature.loadings %>% as.data.frame)[,1:8] %>% arrange(desc(PC_1) ) %>% head(40)
##                PC_1          PC_2         PC_3         PC_4         PC_5
## P2ry12   0.13963867 -0.0151665921 -0.044348077 -0.088165335 -0.052882911
## Gpr34    0.12265663 -0.0175194051 -0.020513801 -0.084840794 -0.000778228
## Selenop  0.10234684 -0.0027557160  0.035357183  0.036060483 -0.057984484
## Hpgd     0.08685875 -0.0024085801  0.011260165 -0.004663422 -0.036008875
## Crybb1   0.08346912  0.0123951728  0.027188648  0.045963975 -0.072820113
## Ltc4s    0.08288665 -0.0082387554 -0.017316090  0.047941538 -0.048261295
## Maf      0.07700314 -0.0054165129  0.012514916 -0.060576486 -0.040837394
## Fau      0.07690935  0.0317404563  0.124958452  0.157314933 -0.084672320
## Ramp1    0.07187904  0.0342843580  0.066799140  0.058569539 -0.049594610
## Arsb     0.06507568  0.0234552345  0.023355566  0.045737381 -0.050045327
## Sox4     0.06453758 -0.0060489245  0.004684437 -0.040082131  0.013540583
## Cox7a2l  0.06330282  0.0196552562  0.066117665  0.078852651 -0.056798165
## Tpt1     0.05911969  0.0336006292  0.116269858  0.172497855 -0.073281173
## Lst1     0.05640582  0.0069852461  0.076876514  0.003410994 -0.040564862
## Cd9      0.05553437  0.0040860088  0.052044323 -0.067185587  0.100443281
## Rcbtb2   0.05524587  0.0046484785  0.033321154  0.006885790 -0.019655435
## Lrp1     0.05522428  0.0109483897  0.021312185  0.054350338 -0.027094116
## Ctsd     0.05509809 -0.0451100801 -0.056766308 -0.091891783  0.083188967
## Serpine2 0.05457313 -0.0004856086  0.001515587 -0.023747017  0.022221461
## Apoe     0.05044189  0.0312709092  0.149765039  0.139282768 -0.063505866
## Klhl24   0.04828092 -0.0060478308  0.021528748 -0.031998240 -0.002589549
## Sgk1     0.04811596 -0.0011396746  0.007320379 -0.040107614  0.018743558
## Slc12a2  0.04633511  0.0001807426  0.019318523 -0.007841940  0.015024745
## Igfbp4   0.04588320  0.0382343049  0.076252840  0.078008216 -0.062837133
## Gnas     0.04204847  0.0290834538  0.057573012  0.076360861 -0.007718047
## Lsp1     0.04089682  0.0159459054  0.058174690  0.064991655 -0.029279458
## Npnt     0.03891465  0.0161036097  0.045446558  0.036072756 -0.034228609
## Cbfa2t3  0.03727862  0.0062232497  0.005474586 -0.003983893 -0.018886924
## Ccl6     0.03566747  0.0003228454  0.027035248 -0.003979250  0.026948735
## Hmgb1    0.03507957  0.0477862186 -0.001018773  0.049586910 -0.010756468
## Cryba4   0.03493302  0.0134561014  0.027482891  0.036038966 -0.028539448
## Ttc28    0.03392395  0.0002366513 -0.002416500 -0.038650375  0.001974035
## Ddit4    0.03265884 -0.0134297868 -0.013606092 -0.054221173  0.001645685
## Scd2     0.03213781  0.0179404123  0.036088020  0.041782247  0.023910073
## Hacd4    0.03153229  0.0121728764  0.033684215  0.013635499 -0.031098803
## Il7r     0.03135156 -0.0081452293 -0.023792791 -0.078591728 -0.012667362
## Zbtb20   0.02887074 -0.0056922132 -0.012326484 -0.060718541 -0.007390124
## H2afv    0.02855195  0.0716299678  0.016617826  0.016920232 -0.020729769
## Rgs7bp   0.02664408  0.0063471202 -0.001524990  0.017837057 -0.017807206
## Pmp22    0.02656317 -0.0183026275 -0.036913248 -0.096494894  0.091162032
##                   PC_6          PC_7         PC_8
## P2ry12    0.0412088349  0.0458704663  0.034964057
## Gpr34    -0.0268512420  0.0701324437  0.022739045
## Selenop  -0.0009875476  0.0484488244 -0.030883158
## Hpgd      0.0366368155  0.0229690257  0.047185843
## Crybb1    0.0673803726  0.0438080497 -0.033079042
## Ltc4s     0.0614497776  0.0111141404 -0.005257964
## Maf       0.0743906136  0.0289044586  0.061958655
## Fau       0.0716363427 -0.0245655704 -0.119193444
## Ramp1     0.0651011982  0.0237478413 -0.024422704
## Arsb      0.0648907450  0.0373091497  0.009234859
## Sox4      0.0109584576  0.0622589780  0.044286390
## Cox7a2l   0.0582253756 -0.0010899477 -0.032181773
## Tpt1      0.0816385936  0.0011821777 -0.092141353
## Lst1      0.0109179654 -0.0344413657 -0.021893584
## Cd9      -0.1249452109  0.1538975630 -0.015283657
## Rcbtb2    0.0094302697  0.0370442682  0.013499504
## Lrp1      0.0449072717  0.0614508905  0.028313760
## Ctsd     -0.2143143499  0.1512430750 -0.027253601
## Serpine2 -0.0363405991  0.1250061609  0.009947119
## Apoe     -0.0347302876  0.0195746203 -0.108951617
## Klhl24   -0.0161091415  0.0256174099  0.020445996
## Sgk1      0.0002766317  0.0687721238  0.027306013
## Slc12a2  -0.0036760423  0.0871534544  0.032165177
## Igfbp4    0.1099909461 -0.0132678773 -0.017177141
## Gnas      0.0076375901  0.0724391138 -0.018235768
## Lsp1      0.0230942748 -0.0118368613  0.044483462
## Npnt     -0.0038484162  0.0616902097 -0.017351571
## Cbfa2t3   0.0080058887  0.0020442363  0.051513612
## Ccl6     -0.0453825411  0.0732716098  0.019854849
## Hmgb1     0.0428546504  0.0153216749  0.002099347
## Cryba4    0.0173667948  0.0142313513 -0.031093906
## Ttc28    -0.0199351551  0.0380126969  0.030979477
## Ddit4    -0.0210400204 -0.0180619949  0.056302732
## Scd2     -0.0340697294  0.0889797347  0.012203239
## Hacd4    -0.0240167754 -0.0124958338 -0.009701733
## Il7r     -0.0268268095 -0.0079792611  0.019259148
## Zbtb20   -0.0232450180  0.0090133408  0.047387326
## H2afv     0.0054896704 -0.0007457628 -0.021536287
## Rgs7bp    0.0413317843  0.0152016141  0.025736521
## Pmp22    -0.1062348557  0.0855533869  0.063192106
(GEX.seur@reductions$pca@feature.loadings %>% as.data.frame)[,1:8] %>% arrange(PC_1 ) %>% head(40)
##                 PC_1          PC_2          PC_3          PC_4         PC_5
## Ccl12    -0.15446638 -0.0158503275 -0.0588233666  0.0295314158 -0.011739151
## Bst2     -0.12534839 -0.0081415338 -0.0196235812  0.0369619237 -0.039467922
## Ccl2     -0.12039879 -0.0016493436  0.0406162805 -0.0393664732 -0.003648507
## Ms4a6c   -0.11931740 -0.0078349421  0.0310257853 -0.0203518004  0.004116030
## Ctsc     -0.11816853 -0.0034266426 -0.0320782682  0.0170869825  0.032181860
## Srgn     -0.11657303 -0.0165066876 -0.0625787846  0.0196647368  0.037581949
## Lgals3bp -0.11379177 -0.0058433098 -0.0383521323  0.0522222511 -0.047924923
## Fth1     -0.11365660 -0.0033029542  0.0634404145  0.0397156258  0.055217752
## Slfn2    -0.11278163 -0.0006462411 -0.0549693928  0.0575720546 -0.066714025
## Gpr84    -0.11168549 -0.0076226358  0.0052118037 -0.0181664211  0.059419905
## Ly6e     -0.10906247 -0.0136540979 -0.1034944683 -0.0188370585 -0.033439332
## Fcgr4    -0.10725543 -0.0078056200  0.0026389252  0.0164146144 -0.033106970
## Calr     -0.10720258 -0.0217903503 -0.0777182249  0.0080851765  0.052418593
## Tspo     -0.10689223  0.0018461754 -0.0294770077  0.0818021379 -0.047002189
## Ms4a6d   -0.10112254 -0.0160317169 -0.0043939497 -0.0214253741  0.016658230
## Sdf2l1   -0.09961599 -0.0109629861 -0.0735564732  0.0218046813  0.041401614
## Rtp4     -0.09809984 -0.0082204374 -0.0444918804  0.0379765343 -0.086112472
## Fcgr1    -0.09790103 -0.0189882896 -0.0728495569  0.0061477004 -0.055806844
## Manf     -0.09751905 -0.0104339995 -0.0723708519  0.0014918058  0.048614194
## Slfn5    -0.09236343 -0.0100027406 -0.0042728412  0.0120944080 -0.150094284
## C5ar1    -0.09153165 -0.0139396428 -0.0554405074  0.0174886010  0.068982775
## Pdia6    -0.09127803 -0.0181985124 -0.0731231376 -0.0161695785  0.054441704
## Pdia3    -0.09110777 -0.0117310371 -0.0651608517  0.0105781385  0.032771077
## Cxcl16   -0.09036727  0.0016501886  0.0725821121 -0.0456715357  0.058030016
## Trim30a  -0.08838279 -0.0105366886 -0.0599630574  0.0170913138 -0.085985713
## Cybb     -0.08682761  0.0099316789  0.1652140654 -0.0756287750  0.013717072
## Nme2     -0.08450327  0.0127541838 -0.0153884794  0.1252146985  0.023375394
## Naaa     -0.08437778 -0.0012162095 -0.0537039219  0.0591740541 -0.011772317
## Cebpb    -0.08384438 -0.0068402771 -0.0478786355  0.0137445942  0.022448154
## Ctsz     -0.08256648 -0.0127875206  0.0207912854 -0.0070175249  0.105158361
## Oasl2    -0.08251954 -0.0014700651  0.0096932981 -0.0076264209 -0.163888085
## Cd72     -0.08100939  0.0118933400  0.1163999540 -0.0432713068  0.028526744
## Msr1     -0.07822053  0.0012227358  0.0446079282 -0.0186756917  0.060216818
## Zbp1     -0.07776037 -0.0017223507  0.0086907953  0.0106850104 -0.094801703
## Clec2d   -0.07751063 -0.0019040527  0.0379318128 -0.0192080009 -0.052900115
## Rnf213   -0.07708476 -0.0018748939 -0.0112569913 -0.0021092469 -0.103296645
## Ccdc86   -0.07660991 -0.0061789807 -0.0272841601  0.0354812171 -0.004657440
## Phf11b   -0.07534707 -0.0035339514  0.0068151731  0.0001443174 -0.099372545
## Tor3a    -0.07517982 -0.0059225119 -0.0005383912 -0.0137626600 -0.077141209
## Parp14   -0.07500697 -0.0061275940 -0.0158310527 -0.0074743551 -0.118343805
##                  PC_6         PC_7          PC_8
## Ccl12     0.043699617 -0.031501234 -3.842063e-02
## Bst2     -0.037309172  0.004444719 -4.800133e-02
## Ccl2      0.071376411  0.037180746 -2.495023e-02
## Ms4a6c   -0.001108465 -0.070231364  7.898666e-05
## Ctsc     -0.024147452 -0.034698775 -3.388415e-02
## Srgn     -0.052340880 -0.023946955 -1.793162e-02
## Lgals3bp -0.037132779  0.004677086 -3.976443e-02
## Fth1     -0.086636077 -0.030555806 -1.033657e-01
## Slfn2     0.021435847 -0.020071070  2.539096e-02
## Gpr84     0.070512047  0.023647693 -3.846352e-02
## Ly6e     -0.085180646 -0.050034412 -2.126753e-02
## Fcgr4    -0.019083039 -0.041639276 -3.011497e-02
## Calr     -0.012856411  0.011670336 -3.283525e-03
## Tspo     -0.040940301 -0.034622714 -5.299911e-02
## Ms4a6d   -0.008645044 -0.051289767 -1.808455e-02
## Sdf2l1   -0.040426618 -0.012527462 -2.894583e-02
## Rtp4     -0.003922611  0.010143696  3.015369e-03
## Fcgr1     0.007675670 -0.025504570 -2.448855e-02
## Manf     -0.015984920 -0.011302801 -2.120685e-02
## Slfn5    -0.009208678  0.056339860  3.651517e-02
## C5ar1     0.019841016 -0.008585329  3.395183e-03
## Pdia6    -0.026915462 -0.003798765 -3.897175e-03
## Pdia3    -0.009820449  0.014483175 -4.913796e-03
## Cxcl16    0.004665190  0.027679396 -6.260948e-02
## Trim30a  -0.008381039  0.008471138  3.072463e-02
## Cybb      0.006516005 -0.060463991  2.810783e-02
## Nme2      0.010561063 -0.004088738 -6.004136e-02
## Naaa      0.025167181 -0.041193855 -2.657304e-02
## Cebpb     0.008655164 -0.029553032  2.609040e-02
## Ctsz     -0.118321723  0.063635046 -9.301187e-02
## Oasl2    -0.026919588  0.051020963  2.167516e-02
## Cd72      0.039821621 -0.048075515 -7.359021e-02
## Msr1      0.059268480 -0.037818571  1.029545e-02
## Zbp1     -0.037700260  0.011818868  9.254766e-04
## Clec2d    0.030695455 -0.004692081  1.185929e-03
## Rnf213   -0.011075220  0.045557063  3.773417e-02
## Ccdc86    0.016331987  0.016480874  1.153678e-02
## Phf11b   -0.015325615  0.040628561  1.389780e-02
## Tor3a    -0.006618078  0.046538763  3.029327e-03
## Parp14    0.007106489  0.053315911  5.047986e-02
decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:18
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 23829
## Number of edges: 650085
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7332
## Number of communities: 22
## Elapsed time: 5 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 50, seed.use = 283)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:50:16 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:50:16 Read 23829 rows and found 18 numeric columns
## 20:50:16 Using Annoy for neighbor search, n_neighbors = 50
## 20:50:16 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:50:18 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\Rtmps9LPBJ\filec1744ab4bb6
## 20:50:18 Searching Annoy index using 1 thread, search_k = 5000
## 20:50:32 Annoy recall = 100%
## 20:50:32 Commencing smooth kNN distance calibration using 1 thread
## 20:50:35 Initializing from normalized Laplacian + noise
## 20:50:36 Commencing optimization for 200 epochs, with 1749622 positive edges
## 20:51:09 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "FB.info", split.by = "FB.info", 
        ncol = 5, cols = color.FB)

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 5, cols = color.FB)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB)

check some markers

markers.mig <- c("Ptprc",#"Cd3d","Cd3e","Cd19",
                 "Cd74","Lyz2","Ccl4",
                 "Aif1","P2ry12","C1qa","Spp1",
                 "Top2a","Pcna","Mki67","Mcm6",
                 "Cx3cr1","Il4ra","Il13ra1","Fabp5",
                 "Hmox1","Ms4a7","Pf4","Tubb3",
                 "tdTomato-head","tdTomato-mid","tdTomato-tail","CreERT2"
                 )
FeaturePlot(GEX.seur, 
            features = markers.mig,
            ncol = 4)

DotPlot(GEX.seur, features = markers.mig, group.by = "FB.info",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)

VlnPlot(GEX.seur, features = c("tdTomato-head","tdTomato-mid","tdTomato-tail","CreERT2"),
        group.by = "FB.info", ncol = 2)

DotPlot(GEX.seur, features = markers.mig, group.by = "seurat_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(2,0,17,12,15,
                                            6,5,10,7,14,16,18,
                                            9,1,4,3,8,11,13,
                                            21,20,19))
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$sort_clusters)[c(3:12),],
                   main = "Cell Count",
                   gaps_row = c(3,7),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$sort_clusters)[c(3:12),]),
                   main = "Cell Ratio",
                   gaps_row = c(3,7),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, 
#                                  min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_LYN.marker.sort1006.csv", header = TRUE, sep = ",")
GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
## # A tibble: 176 x 7
## # Groups:   cluster [22]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
##        <dbl>      <dbl> <dbl> <dbl>     <dbl>   <int> <chr>  
##  1 0              0.890 0.997 0.939 0               2 Gpr34  
##  2 0              0.835 1     0.979 0               2 P2ry12 
##  3 0              0.822 0.979 0.904 0               2 Tgfbr1 
##  4 0              0.754 0.956 0.866 0               2 Arhgap5
##  5 6.29e-321      0.732 0.933 0.784 1.12e-316       2 Rhob   
##  6 6.42e-284      0.894 0.673 0.355 1.15e-279       2 Pmp22  
##  7 4.31e-269      0.795 0.74  0.522 7.72e-265       2 Fam102b
##  8 6.28e-202      0.709 0.577 0.339 1.12e-197       2 Havcr2 
##  9 0              0.845 0.993 0.938 0               0 Gpr34  
## 10 0              0.789 1     1     0               0 Cst3   
## # ... with 166 more rows
GEX.markers.sort <- GEX.markers.pre
GEX.markers.sort$cluster <- factor(as.character(GEX.markers.sort$cluster),
                          levels = levels(GEX.seur$sort_clusters))


markers.pre_t48 <- (GEX.markers.sort %>% group_by(cluster) %>% 
                  filter(pct.1>0.1) %>%
                   top_n(n = 48, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t60 <- (GEX.markers.sort %>% group_by(cluster) %>% 
                  filter(pct.1>0.1 & gene %in% grep("Rps|Rpl",GEX.markers.sort$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t120 <- (GEX.markers.sort %>% group_by(cluster) %>% 
                  filter(pct.1>0.1 & gene %in% grep("Rps|Rpl",GEX.markers.sort$gene,invert = T,value = T)) %>%
                   top_n(n = 120, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[1:64]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[65:128]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[129:192]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[193:256]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[257:320]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[321:384]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[385:448]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[449:512]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[513:576]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[577:640]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[641:704]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, group.by = "sort_clusters", features = rev(markers.pre_t60[705:772]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

check.ref1 <- c("Tmem119","Hexb","Slc2a5","P2ry12",
                "Siglech","Trem2",  # microglia
                "Mrc1","Lyve1","Cd163","Siglec1",  # CAM
                "Ly6c2","Ccr2","Plac8","Anxa8",
                "Nr4a1",  # Monocyte-derived
                "Flt3","Zbtb46","Batf3","Itgae",
                "Clec9a",  # 
                "Tubb3","Actl6b" # locally addede neuron markers
                )

DotPlot(GEX.seur, features = check.ref1, group.by = "sort_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Anxa8

markers.Sci2019 <- c("Tmem119","Hexb","Slc2a5","P2ry12","Siglech","Trem2","Sparc","Olfml3",  # Microglia
                     "Mrc1","Lyve1","Cd163","Siglec1","Pf4","Ms4a7","Cbr2",  # CAMs
                     "Ly6c2","Ccr2","Plac8","Anxa8","Nr4a1","Mertk","Zbtb26","Cd209a",  # Monocyte-derived
                     "Flt3","Zbtb46","Batf3","Itgae","Clec9a",  # DCs
                     "Tubb3"  # local added
                     )
FeaturePlot(GEX.seur, 
            features = markers.Sci2019,
            ncol = 4)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The
## following requested variables were not found: Anxa8

check.Cell2017 <- c("Hexb","Cst3","P2ry12","Cx3cr1",
                    "Cd9","Ctsd","Cst7","Lpl",
                    "Mrc1","Cd163","S100a4","Cd74",
                    "Cd79b","Rag1","Trbc2","Nkg7",
                    "S100a9","Camp"
                    )

DotPlot(GEX.seur, features = check.Cell2017, group.by = "sort_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Rag1, Nkg7, S100a9, Camp

DoubletFinder

library(DoubletFinder)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR

## NULL
## [1] "Creating 7943 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 7943 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters", split.by = "DoubletFinder0.05")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters", split.by = "DoubletFinder0.1")

table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.05")])
##              DoubletFinder0.05
## sort_clusters Doublet Singlet
##            2      120    2404
##            0       20    3251
##            17       0     116
##            12      12     356
##            15      18     191
##            6       88    1797
##            5       62    1973
##            10     124     735
##            7       75    1508
##            14      28     191
##            16      45     150
##            18      23      74
##            9       81     858
##            1       60    2539
##            4       77    1965
##            3      205    2241
##            8       81    1461
##            11      32     476
##            13      30     282
##            21       2      24
##            20       6      21
##            19       2      25
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.1")])
##              DoubletFinder0.1
## sort_clusters Doublet Singlet
##            2      177    2347
##            0       34    3237
##            17       0     116
##            12      23     345
##            15      29     180
##            6      156    1729
##            5      126    1909
##            10     226     633
##            7      154    1429
##            14     120      99
##            16     192       3
##            18      50      47
##            9      152     787
##            1      111    2488
##            4      138    1904
##            3      368    2078
##            8      141    1401
##            11      74     434
##            13      92     220
##            21       4      22
##            20      10      17
##            19       6      21
#
GEX.seur$age <- as.character(GEX.seur$FB.info)

GEX.seur$age[GEX.seur$age %in% c("P06.TDT","P06.MIG","P06.CTR")] <- "P06"
GEX.seur$age[GEX.seur$age %in% c("P30.PBS.TDT","P30.PBS.MIG","P30.PBS.CTR")] <- "P30.PBS"
GEX.seur$age[GEX.seur$age %in% c("P30.LPS.TDT1","P30.LPS.TDT2","P30.LPS.MIG","P30.LPS.CTR")] <- "P30.LPS"

GEX.seur$age <- factor(GEX.seur$age, 
                       levels = c("P06","P30.PBS","P30.LPS"))

#
GEX.seur$cnt <- as.character(GEX.seur$FB.info)

GEX.seur$cnt[GEX.seur$cnt %in% c("P06.CTR","P30.PBS.CTR","P30.LPS.CTR")] <- "CTR"
GEX.seur$cnt[GEX.seur$cnt %in% c("P06.MIG","P30.PBS.MIG","P30.LPS.MIG")] <- "MIG"
GEX.seur$cnt[GEX.seur$cnt %in% c("P06.TDT","P30.PBS.TDT","P30.LPS.TDT1","P30.LPS.TDT2")] <- "TDT"

GEX.seur$cnt <- factor(GEX.seur$cnt,
                       levels = c("CTR","MIG","TDT"))

preAnno

C15: Monocyte-derived (Cd74 hi, MHCII hi)
C21: cDC(/Monocyte)
C20: BAM (Brain Associated Macrophage)
C19: Neuron (should also be mixed with immune)
C17: lowUMI to exclude

GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno[GEX.seur$preAnno %in% c(2,0,12,
                                         6,5,10,7,14,16,18,
                                         9,1,3,4,8,11,13)] <- "Microglia"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(17)] <- "MIG.lowUMI"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(15)] <- "Mono"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(21)] <- "cDC"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(20)] <- "BAM"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(19)] <- "Neuron"

GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                           levels = c("Microglia","MIG.lowUMI",
                                      "Mono","cDC","BAM","Neuron"
                                      ))
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno")

#saveRDS(GEX.seur, "Spp1tdt.GEX.seur.sort1006.rds")

DEGs

Microglia quickly do

  1. CTR vs TDT in P06/P30.PBS/P30.LPS

  2. PBS vs LPS in P30.CTR/MIG/TDT

GEX.seur
## An object of class Seurat 
## 17900 features across 23829 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.comp <- subset(GEX.seur, subset= preAnno %in% c("Microglia"))
GEX.comp
## An object of class Seurat 
## 17900 features across 23424 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
head(GEX.comp)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGGTCGACA-1    Spp1tdt       5001         2062   2.699460  12.437512
## AAACCCAAGGTTCTTG-1    Spp1tdt       6501         2457   2.553453  12.305799
## AAACCCAAGTCGGCCT-1    Spp1tdt       5529         2163   5.498282  17.887502
## AAACCCAAGTCTGCGC-1    Spp1tdt       8466         2389   3.874321  26.836759
## AAACCCACAAACTCGT-1    Spp1tdt       4188         1841   2.960840   8.572111
## AAACCCACACGCGGTT-1    Spp1tdt       4524         1913   1.215738   9.858532
## AAACCCACAGTAGAAT-1    Spp1tdt       6855         2585   2.727936  14.529540
## AAACCCACATCCGTGG-1    Spp1tdt       2337         1225   3.979461   7.659392
## AAACCCAGTATCGAAA-1    Spp1tdt       4318         1955   2.292728  11.741547
## AAACCCAGTGCCAAGA-1    Spp1tdt       7306         2552   2.929099  12.565015
##                         FB.info       S.Score    G2M.Score Phase
## AAACCCAAGGTCGACA-1  P30.LPS.CTR  0.0061184939 -0.065233665     S
## AAACCCAAGGTTCTTG-1 P30.LPS.TDT1  0.0002076412 -0.071026272     S
## AAACCCAAGTCGGCCT-1      P06.TDT -0.0493217054 -0.114069796    G1
## AAACCCAAGTCTGCGC-1      P06.TDT -0.0278100775 -0.083520479    G1
## AAACCCACAAACTCGT-1  P30.PBS.MIG  0.0192137320  0.060314405   G2M
## AAACCCACACGCGGTT-1 P30.LPS.TDT2 -0.0535299003 -0.054646562    G1
## AAACCCACAGTAGAAT-1      P06.CTR  0.4575858250  0.005436139     S
## AAACCCACATCCGTGG-1  P30.PBS.TDT  0.0489756368 -0.042116708     S
## AAACCCAGTATCGAAA-1  P30.LPS.MIG -0.0314645626  0.021833672   G2M
## AAACCCAGTGCCAAGA-1      P06.TDT -0.0546373200 -0.081613375    G1
##                    RNA_snn_res.1.5 seurat_clusters sort_clusters
## AAACCCAAGGTCGACA-1               4               4             4
## AAACCCAAGGTTCTTG-1               3               3             3
## AAACCCAAGTCGGCCT-1               7               7             7
## AAACCCAAGTCTGCGC-1               7               7             7
## AAACCCACAAACTCGT-1               0               0             0
## AAACCCACACGCGGTT-1               4               4             4
## AAACCCACAGTAGAAT-1               5               5             5
## AAACCCACATCCGTGG-1               0               0             0
## AAACCCAGTATCGAAA-1               1               1             1
## AAACCCAGTGCCAAGA-1              10              10            10
##                    pANN_0.25_0.005_1191 DoubletFinder0.05 pANN_0.25_0.005_2383
## AAACCCAAGGTCGACA-1          0.000000000           Singlet          0.000000000
## AAACCCAAGGTTCTTG-1          0.044025157           Singlet          0.044025157
## AAACCCAAGTCGGCCT-1          0.106918239           Singlet          0.113207547
## AAACCCAAGTCTGCGC-1          0.044025157           Singlet          0.044025157
## AAACCCACAAACTCGT-1          0.006289308           Singlet          0.006289308
## AAACCCACACGCGGTT-1          0.012578616           Singlet          0.012578616
## AAACCCACAGTAGAAT-1          0.383647799           Singlet          0.364779874
## AAACCCACATCCGTGG-1          0.000000000           Singlet          0.000000000
## AAACCCAGTATCGAAA-1          0.006289308           Singlet          0.006289308
## AAACCCAGTGCCAAGA-1          0.421383648           Doublet          0.427672956
##                    DoubletFinder0.1     age cnt   preAnno
## AAACCCAAGGTCGACA-1          Singlet P30.LPS CTR Microglia
## AAACCCAAGGTTCTTG-1          Singlet P30.LPS TDT Microglia
## AAACCCAAGTCGGCCT-1          Singlet     P06 TDT Microglia
## AAACCCAAGTCTGCGC-1          Singlet     P06 TDT Microglia
## AAACCCACAAACTCGT-1          Singlet P30.PBS MIG Microglia
## AAACCCACACGCGGTT-1          Singlet P30.LPS TDT Microglia
## AAACCCACAGTAGAAT-1          Doublet     P06 CTR Microglia
## AAACCCACATCCGTGG-1          Singlet P30.PBS TDT Microglia
## AAACCCAGTATCGAAA-1          Singlet P30.LPS MIG Microglia
## AAACCCAGTGCCAAGA-1          Doublet     P06 TDT Microglia
head(GEX.comp$age)
## AAACCCAAGGTCGACA-1 AAACCCAAGGTTCTTG-1 AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1 
##            P30.LPS            P30.LPS                P06                P06 
## AAACCCACAAACTCGT-1 AAACCCACACGCGGTT-1 
##            P30.PBS            P30.LPS 
## Levels: P06 P30.PBS P30.LPS
head(GEX.comp$cnt)
## AAACCCAAGGTCGACA-1 AAACCCAAGGTTCTTG-1 AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1 
##                CTR                TDT                TDT                TDT 
## AAACCCACAAACTCGT-1 AAACCCACACGCGGTT-1 
##                MIG                TDT 
## Levels: CTR MIG TDT

comp.a

Idents(GEX.comp) <- "cnt"
DEGs.a <- list()

for(nn in levels(GEX.comp$age)){
  
  DEGs.a[[nn]] <- FindAllMarkers(subset(GEX.comp, subset = cnt %in% c("CTR","TDT") & age %in% c(nn)),
                                 assay = "RNA",
                                 only.pos = T,
                                 min.pct = 0.05,
                                 logfc.threshold = 0.1,
                                 test.use = "MAST")
  
  
}
## Calculating cluster CTR
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTR
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster CTR
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster TDT
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
#DEGs.a
names(DEGs.a)
## [1] "P06"     "P30.PBS" "P30.LPS"
# save DEGs' table
df.DEGs.a <- data.frame()

for(nn in names(DEGs.a)){
  df.DEGs.a <- rbind(df.DEGs.a, data.frame(DEGs.a[[nn]], age = nn))
}

#write.csv(df.DEGs.a, "DEGs.a.CTRvsTDT_in_ages.csv")

comp.b

Idents(GEX.comp) <- "age"
list.b <- list(P30.CTR = c("P30.PBS.CTR","P30.LPS.CTR"),
               P30.MIG = c("P30.PBS.MIG","P30.LPS.MIG"),
               P30.TDT = c("P30.PBS.TDT","P30.LPS.TDT1","P30.LPS.TDT2"))
list.b
## $P30.CTR
## [1] "P30.PBS.CTR" "P30.LPS.CTR"
## 
## $P30.MIG
## [1] "P30.PBS.MIG" "P30.LPS.MIG"
## 
## $P30.TDT
## [1] "P30.PBS.TDT"  "P30.LPS.TDT1" "P30.LPS.TDT2"
names(list.b)
## [1] "P30.CTR" "P30.MIG" "P30.TDT"
DEGs.b <- list()

for(nn in names(list.b)){
  
  DEGs.b[[nn]] <- FindAllMarkers(subset(GEX.comp, subset = age %in% c("P30.PBS","P30.LPS") & FB.info %in% list.b[[nn]]),
                                 assay = "RNA",
                                 only.pos = T,
                                 min.pct = 0.05,
                                 logfc.threshold = 0.1,
                                 test.use = "MAST")
  
  
}
## Calculating cluster P30.PBS
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.LPS
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.PBS
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.LPS
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.PBS
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## Calculating cluster P30.LPS
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
#DEGs.b
names(DEGs.b)
## [1] "P30.CTR" "P30.MIG" "P30.TDT"
# save DEGs' table
df.DEGs.b <- data.frame()

for(nn in names(DEGs.b)){
  df.DEGs.b <- rbind(df.DEGs.b, data.frame(DEGs.b[[nn]], condition = nn))
}

#write.csv(df.DEGs.b, "DEGs.b.PBSvsLPS_in_P30_conditions.csv")

DEG plot

plot.a

pp.DEGs.a <- lapply(DEGs.a, function(x){NA})
names(DEGs.a)
## [1] "P06"     "P30.PBS" "P30.LPS"
DEGs.a.combine <- DEGs.a
DEGs.a.combine <- lapply(DEGs.a.combine, function(x){
  x[x$cluster=="CTR","avg_log2FC"] <- -x[x$cluster=="CTR","avg_log2FC"]
  x
})
P06
pp.DEGs.a$P06 <- DEGs.a.combine$P06 %>%
  mutate(label=ifelse(((p_val_adj < 1e-8 & avg_log2FC<0) | (p_val_adj < 1e-8 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P06, CTR vs TDT") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,2))
pp.DEGs.a$P06

P30.PBS
pp.DEGs.a$P30.PBS <- DEGs.a.combine$P30.PBS %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P30.PBS, CTR vs TDT") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")#+ xlim(c(-1,2))
pp.DEGs.a$P30.PBS

P30.LPS
pp.DEGs.a$P30.LPS <- DEGs.a.combine$P30.LPS %>%
  mutate(label=ifelse(((p_val_adj < 1e-7 & avg_log2FC<0) | (p_val_adj < 1e-7 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTR",ifelse(p_val_adj<0.05 & avg_log2FC>0,"TDT","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTR"="Skyblue",
                               "TDT"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="P30.LPS, CTR vs TDT") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") #+ xlim(c(-1,2))
pp.DEGs.a$P30.LPS

stat
cut.padj = 0.05
cut.log2FC = 0.3   
  
cut.pct1 = 0.1

df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(age,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
##       age cluster up.DEGs
## 1     P06     CTR      16
## 2     P06     TDT      17
## 3 P30.LPS     CTR       7
## 4 P30.LPS     TDT      23
## 5 P30.PBS     CTR       2
## 6 P30.PBS     TDT       6
pp.stat.DEG <- list()
pp.stat.DEG[["a"]] <- df.DEGs.a %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(age,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=age, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.1, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
pp.stat.DEG[["a"]]

plot.b

pp.DEGs.b <- lapply(DEGs.b, function(x){NA})
names(DEGs.b)
## [1] "P30.CTR" "P30.MIG" "P30.TDT"
DEGs.b.combine <- DEGs.b
DEGs.b.combine <- lapply(DEGs.b.combine, function(x){
  x[x$cluster=="P30.PBS","avg_log2FC"] <- -x[x$cluster=="P30.PBS","avg_log2FC"]
  x
})
CTR
pp.DEGs.b$P30.CTR <- DEGs.b.combine$P30.CTR %>%
  mutate(label=ifelse(((p_val_adj < 1e-80 & avg_log2FC<0) | (p_val_adj < 1e-80 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"P30.PBS",ifelse(p_val_adj<0.05 & avg_log2FC>0,"P30.LPS","None")),
         padj=ifelse(p_val_adj<1e-300,p_val_adj+1e-300,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("P30.PBS"="Skyblue",
                               "P30.LPS"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="CTR, P30.PBS vs P30.LPS") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,2))
pp.DEGs.b$P30.CTR

MIG
pp.DEGs.b$P30.MIG <- DEGs.b.combine$P30.MIG %>%
  mutate(label=ifelse(((p_val_adj < 1e-80 & avg_log2FC<0) | (p_val_adj < 1e-80 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"P30.PBS",ifelse(p_val_adj<0.05 & avg_log2FC>0,"P30.LPS","None")),
         padj=ifelse(p_val_adj<1e-300,p_val_adj+1e-300,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("P30.PBS"="Skyblue",
                               "P30.LPS"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="MIG, P30.PBS vs P30.LPS") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,2))
pp.DEGs.b$P30.MIG

TDT
pp.DEGs.b$P30.TDT <- DEGs.b.combine$P30.TDT %>%
  mutate(label=ifelse(((p_val_adj < 1e-80 & avg_log2FC<0) | (p_val_adj < 1e-80 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)) &
                        !grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"P30.PBS",ifelse(p_val_adj<0.05 & avg_log2FC>0,"P30.LPS","None")),
         padj=ifelse(p_val_adj<1e-300,p_val_adj+1e-300,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("P30.PBS"="Skyblue",
                               "P30.LPS"="pink",
                               "None"="grey")) +
  theme_classic() + labs(title="TDT, P30.PBS vs P30.LPS") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted") + xlim(c(-1,2))
pp.DEGs.b$P30.TDT

stat
cut.padj = 0.05
cut.log2FC = 0.3   
  
cut.pct1 = 0.1

df.DEGs.b %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(condition,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
##   condition cluster up.DEGs
## 1   P30.CTR P30.PBS     417
## 2   P30.CTR P30.LPS     500
## 3   P30.MIG P30.PBS     309
## 4   P30.MIG P30.LPS     381
## 5   P30.TDT P30.PBS     392
## 6   P30.TDT P30.LPS     422
pp.stat.DEG <- list()
pp.stat.DEG[["b"]] <- df.DEGs.b %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(condition,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=condition, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title=paste0("up.DEGs stat, pct.1>0.1, padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Number of DEGs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
pp.stat.DEG[["b"]]

check1017

GEX.seur <- readRDS("Spp1tdt.GEX.seur.sort1006.rds")
GEX.seur
## An object of class Seurat 
## 17900 features across 23829 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
VlnPlot(GEX.seur, features = c("tdTomato-head","tdTomato-mid","tdTomato-tail","CreERT2"),
        group.by = "FB.info", ncol = 2)

GEX.clean <- subset(GEX.seur, subset= preAnno %in% c("Microglia") & DoubletFinder0.05 == "Singlet")
GEX.clean
## An object of class Seurat 
## 17900 features across 22261 samples within 1 assay 
## Active assay: RNA (17900 features, 1246 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
#GEX.clean@meta.data
head(GEX.seur$cnt)
## AAACCCAAGGTCGACA-1 AAACCCAAGGTTCTTG-1 AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1 
##                CTR                TDT                TDT                TDT 
## AAACCCAAGTTTGGCT-1 AAACCCACAAACTCGT-1 
##                TDT                MIG 
## Levels: CTR MIG TDT
head(GEX.seur$FB.info)
## AAACCCAAGGTCGACA-1 AAACCCAAGGTTCTTG-1 AAACCCAAGTCGGCCT-1 AAACCCAAGTCTGCGC-1 
##        P30.LPS.CTR       P30.LPS.TDT1            P06.TDT            P06.TDT 
## AAACCCAAGTTTGGCT-1 AAACCCACAAACTCGT-1 
##       P30.LPS.TDT2        P30.PBS.MIG 
## 12 Levels: Doublet Negative P30.PBS.TDT P30.PBS.MIG ... P06.CTR
GEX.clean$cnt.new <- factor(paste0(GEX.clean$age,
                                   ".",
                                   GEX.clean$cnt),
                            levels = c("P06.CTR","P06.MIG","P06.TDT",
                                       "P30.PBS.CTR","P30.PBS.MIG","P30.PBS.TDT",
                                       "P30.LPS.CTR","P30.LPS.MIG","P30.LPS.TDT"))
scales::show_col(color.FBraw, ncol = 5)

scales::show_col(color.FB, ncol = 5)

color.new <- color.FB[c(10,9,8,3,2,1,7,6,4)]
scales::show_col(color.new, ncol = 3)

VlnPlot(GEX.clean, features = c("Spi1","Spib","Elk1","Elf1",
                               "Elf2","Elf5","Gabpa","Erg"),
        group.by = "cnt.new", ncol = 3, pt.size = 0, cols = color.new)& geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.02)

VlnPlot(GEX.clean, features = c("Spi1","Spib","Elk1","Elf1",
                               "Elf2","Elf5","Gabpa","Erg"),
        group.by = "cnt.new", ncol = 3, pt.size = 0, cols = color.new) & geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55)& ylim(c(0,3.8)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("P06.CTR","P06.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.TDT"),
                                                  c("P30.LPS.CTR","P30.LPS.TDT"),
                                                  c("P06.CTR","P30.PBS.CTR"),
                                                  c("P30.PBS.CTR","P30.LPS.CTR"),
                                                  c("P06.TDT","P30.PBS.TDT"),
                                                  c("P30.PBS.TDT","P30.LPS.TDT"),
                                                  c("P06.CTR","P06.MIG"),
                                                  c("P06.MIG","P06.TDT"),
                                                  c("P30.PBS.CTR","P30.PBS.MIG"),
                                                  c("P30.PBS.MIG","P30.PBS.TDT"),
                                                  c("P30.LPS.CTR","P30.LPS.MIG"),
                                                  c("P30.LPS.MIG","P30.LPS.TDT")),
                               label.y = c(3.2,3.2,3.2,3.4,3.4,3.6,3.6,
                                           rep(2.8,6)),
                               size=2.5
                               )

mapal <- colorRampPalette(RColorBrewer::brewer.pal(4,"Spectral"))(120)
DotPlot(GEX.clean, features = c("Spi1","Spib","Elk1","Elf1",
                               "Elf2","Elf5","Gabpa","Erg"), group.by = "cnt.new",cols = c("lightgrey","red")
                         ) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(y="Conditions") + 
  scale_color_gradientn(colours  = rev(mapal))
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Elf5, Erg

VlnPlot(GEX.clean, features = c("tdTomato-head","tdTomato-mid","tdTomato-tail","CreERT2"),
        group.by = "cnt.new", ncol = 2, cols = color.new, pt.size = 0) & geom_jitter(alpha=0.25, shape=16, width = 0.3 ,size = 0.22)
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

VlnPlot(GEX.clean, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
        group.by = "cnt.new", ncol = 2, cols = color.new, pt.size = 0) & geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)